In [13]:
import numpy as np

def compute_depth(order_book_side):
    """Compute total depth (total quantity available) on one side of the book."""
    return sum(q for _, q in order_book_side)

def compute_slope(order_book_side):
    """Compute the slope of the bid or ask curve."""
    if len(order_book_side) < 2:
        return None  # Not enough data to compute slope
    p1, q1 = order_book_side[0]
    p2, _ = order_book_side[1]
    return (p1 - p2) / q1 if q1 > 0 else None

def compute_quantity_weighted_price(order_book_side):
    """Compute the quantity-weighted price for bid or ask."""
    total_quantity = sum(q for _, q in order_book_side)
    if total_quantity == 0:
        return None
    weighted_price = sum(p * q for p, q in order_book_side) / total_quantity
    return weighted_price

def compute_quantity_weighted_mid_quote(bid_side, ask_side):
    """Compute the quantity-weighted mid-quote."""
    wp_bid = compute_quantity_weighted_price(bid_side)
    wp_ask = compute_quantity_weighted_price(ask_side)
    if wp_bid is None or wp_ask is None:
        return None
    return (wp_bid + wp_ask) / 2

def compute_quantity_weighted_bid_ask_spread(bid_side, ask_side):
    """Compute the quantity-weighted bid-ask spread."""
    wp_bid = compute_quantity_weighted_price(bid_side)
    wp_ask = compute_quantity_weighted_price(ask_side)
    if wp_bid is None or wp_ask is None:
        return None
    return wp_ask - wp_bid

def compute_mid_quote_difference(mid_quote, bid_side, ask_side):
    """Compute the difference between mid-quote and quantity-weighted mid-quote."""
    wmid = compute_quantity_weighted_mid_quote(bid_side, ask_side)
    if wmid is None:
        return None
    return mid_quote - wmid

def get_order_book(timestamp, df):
    """Extracts bid and ask order book lists sorted by price"""
    df_time = df[df['Timestamp'] == timestamp]
    
    # Extract and sort bid side (highest price first)
    bid_side = df_time[df_time['Side'] == 'bid'][['Price', 'Size']].sort_values(by='Price', ascending=False)
    bid_side = list(bid_side.itertuples(index=False, name=None))  # Convert to list of tuples
    
    # Extract and sort ask side (lowest price first)
    ask_side = df_time[df_time['Side'] == 'ask'][['Price', 'Size']].sort_values(by='Price', ascending=True)
    ask_side = list(ask_side.itertuples(index=False, name=None))  # Convert to list of tuples
    
    return bid_side, ask_side

import pandas as pd

def compute_orderbook_changes(orderbook_df):
    """
    Computes the changes in the order book at each timestamp sequentially.
    
    Parameters:
    - orderbook_df: DataFrame with ['Price', 'Size', 'Side', 'Timestamp'].

    Returns:
    - DataFrame showing changes in order book per timestamp, including previous size.
    """
    # Convert timestamp to datetime if not already
    orderbook_df['Timestamp'] = pd.to_datetime(orderbook_df['Timestamp'])

    # Sort data by timestamp and price for consistent comparison
    orderbook_df = orderbook_df.sort_values(by=["Timestamp", "Price"]).reset_index(drop=True).drop_duplicates()

    # List to store changes
    changes = []

    # Unique timestamps sorted
    timestamps = orderbook_df['Timestamp'].unique()

    for i in range(len(timestamps) - 1):
        t1, t2 = timestamps[i], timestamps[i + 1]

        # Order books at two consecutive timestamps
        ob_t1 = orderbook_df[orderbook_df['Timestamp'] == t1].set_index(['Price', 'Side'])['Size']
        ob_t2 = orderbook_df[orderbook_df['Timestamp'] == t2].set_index(['Price', 'Side'])['Size']

        # Compute changes
        size_changes = ob_t2.subtract(ob_t1, fill_value=0)

        # Store only nonzero changes along with previous size
        for (price, side), change in size_changes.items():
            if change != 0:
                prev_size = ob_t1.get((price, side), 0)  # Get previous size, default to 0 if not present
                new_size = ob_t2.get((price, side), 0)  # Get new size
                changes.append([t2, price, side, prev_size, new_size, change])

    # Convert to DataFrame
    changes_df = pd.DataFrame(changes, columns=['Timestamp', 'Price', 'Side', 'Prev_Size', 'New_Size', 'Size_Change'])

    return changes_df

Limit Order Book Metrics¶

1. Total Depth¶

The total depth on one side of the order book is the sum of all available quantities:

$ \text{Depth} = \sum_{i} q_i $

where $q_i$ is the quantity available at each price level $i$.

2. Slope of the Order Book¶

The slope of the bid or ask curve measures how quickly the price changes with respect to quantity:

$ \text{Slope}_{\text{bid}} = \frac{p_{\text{bid},1} - p_{\text{bid},2}}{q_{\text{bid},1}} $

where:

  • $p_{\text{bid},1}$ is the best (highest) bid price,
  • $p_{\text{bid},2}$ is the second-best bid price,
  • $q_{\text{bid},1}$ is the quantity available at $p_{\text{bid},1}$.

The ask slope is defined analogously.

3. Quantity-Weighted Price¶

The quantity-weighted price for bids or asks is:

$ \text{WP}_{\text{side}} = \frac{\sum_{i} p_{\text{side},i} \cdot q_{\text{side},i}}{\sum_{i} q_{\text{side},i}} $

where:

  • $p_{\text{side},i}$ is the price at level $i$ on the given side (bid or ask),
  • $q_{\text{side},i}$ is the corresponding quantity.

4. Quantity-Weighted Mid-Quote¶

The quantity-weighted mid-quote is the average of the quantity-weighted bid and ask prices:

$ \text{WMid} = \frac{\text{WP}_{\text{bid}} + \text{WP}_{\text{ask}}}{2} $

where:

  • $\text{WP}_{\text{bid}}$ is the quantity-weighted bid price,
  • $\text{WP}_{\text{ask}}$ is the quantity-weighted ask price.

5. Quantity-Weighted Bid-Ask Spread¶

The quantity-weighted bid-ask spread is given by:

$ \text{WSpread} = \text{WP}_{\text{ask}} - \text{WP}_{\text{bid}} $

which represents the difference between the quantity-weighted ask and bid prices.

6. Mid-Quote Difference¶

The difference between the regular mid-quote and the quantity-weighted mid-quote is:

$ \text{Mid-Quote Difference} = \text{MidQuote} - \text{WMid} $

where $\text{MidQuote}$ is the traditional mid-point between the best bid and ask prices.

In [26]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import json
import warnings
warnings.simplefilter("ignore")

# Open and load the JSON file
with open("matchup_details.json", "r") as file:
    matchups = json.load(file)  # Parse JSON into a Python dictionary

matchups_list = [(i.split(' ')[0], i.split(' ')[2]) for i in list(matchups.keys())]

# Dictionary to store where each team appears in the filtered list
team_indices = {}
filtered_matchups = []

for i, (team1, team2) in enumerate(matchups_list):
    if team1 in team_indices or team2 in team_indices:
        old_index = team_indices.get(team1, team_indices.get(team2))
        if old_index is not None:
            filtered_matchups.pop(old_index)
            for key in team_indices:
                if team_indices[key] > old_index:
                    team_indices[key] -= 1
    team_indices[team1] = len(filtered_matchups)
    team_indices[team2] = len(filtered_matchups)
    filtered_matchups.append((team1, team2))

id_filtered = [matchups[" vs. ".join(i)]['outcomes'] for i in filtered_matchups if " vs. ".join(i) in matchups]
keys = list(id_filtered[2].keys())
# comparison = df.sort_values(['Timestamp', 'Price']).drop_duplicates()
keys = list(id_filtered[3].keys())

gamelist = [list(i.keys()) for i in id_filtered]
# gamelist = [list(id_filtered[i].keys()) for i in [2,3,4,6,7]]
for keys in gamelist[2:]:
    team_data_changes = {}
    for team in keys:
        df = pd.read_parquet(team + '.parquet')
        bid_df = df[df.Side == 'bid']
        ask_df = df[df.Side == 'ask']
        bid_changes = compute_orderbook_changes(bid_df)
        ask_changes = compute_orderbook_changes(ask_df)
        team_data_changes[team] = pd.concat([bid_changes, ask_changes])

    fig, axes = plt.subplots(1, 2, figsize=(30, 8), sharex=True, sharey=True, facecolor=(1,1,1))

    for i, team in enumerate(keys[:2]):  # Only plot top two
        col = i
        bid_data_changes = team_data_changes[team][team_data_changes[team].Side == 'bid']
        ask_data_changes = team_data_changes[team][team_data_changes[team].Side == 'ask']
        scatter_bid = axes[col].scatter(
            bid_data_changes["Timestamp"], bid_data_changes["Price"], 
            c=bid_data_changes["Size_Change"], cmap="coolwarm", s=200, 
            edgecolors="k", marker='s', label="Bid Changes"
        )
        scatter_ask = axes[col].scatter(
            ask_data_changes["Timestamp"], ask_data_changes["Price"], 
            c=ask_data_changes["Size_Change"], cmap="coolwarm", s=200, 
            edgecolors="k", marker='o', label="Ask Changes"
        )

        timestamps = sorted(team_data_changes[team]['Timestamp'].unique())
        bid_depths, ask_depths, mid_quotes, bid_ask_spreads = [], [], [], []

        for timestamp in timestamps:
            bid_side, ask_side = get_order_book(timestamp, pd.read_parquet(f'{team}.parquet').drop_duplicates())
            bid_depths.append(compute_depth(bid_side))
            ask_depths.append(compute_depth(ask_side))
            mid_quotes.append(compute_quantity_weighted_mid_quote(bid_side, ask_side))
            bid_ask_spreads.append(compute_quantity_weighted_bid_ask_spread(bid_side, ask_side))

        timestamps_num = [t.timestamp() for t in timestamps]

        axes[col].plot(timestamps, mid_quotes, label="Quantity-Weighted Mid-Quote", marker='o', color='black', markersize=10)
        axes[col].plot(timestamps, bid_ask_spreads, label="Quantity-Weighted Bid-Ask Spread", marker='s', color='red', markersize=12)
        
        axes[col].set_xlabel("Timestamp", fontsize=25)
        axes[col].set_ylabel("Price Level", fontsize=25)
        axes[col].set_title(f"{team} - Bid & Ask Changes", fontsize=35)
        axes[col].tick_params(axis="x", rotation=45)
        axes[col].grid(True)
    
    handles, labels = axes[0].get_legend_handles_labels()
    fig.legend(handles, labels, loc='lower center', ncol=10, fontsize=20)
    plt.suptitle(f'Order Book Visualization for {team}', fontsize=40)
    plt.tight_layout(rect=[0, 0.1, 1, 1])
    plt.show()

Order Book Liquidity and Spread Analysis¶

1) Testing the Hypothesis¶

To test the hypothesis that order book liquidity is unevenly distributed and market depth affects bid-ask spread fluctuations, we can analyze the following:

  • Market Depth:

    • Compute the total volume at each price level for bid and ask sides.
    • Compare top-level liquidity (best bid/ask) vs. cumulative depth at different levels.
  • Spread Volatility vs. Liquidity:

    • Compute the standard deviation of the quantity-weighted bid-ask spread.
    • Compare spread fluctuations against total available liquidity at top levels.
  • Impact of Market Orders:

    • Track how large market orders impact the bid-ask spread.
    • If a small order significantly changes the spread, liquidity is thin at the top.
  • Order Flow Imbalance:

    • Compute cumulative net order flow over time: $$ OF_t = \sum (V_{buy} - V_{sell}) $$
    • If order flow is imbalanced but the spread remains volatile, liquidity must be deeper in the book.
  • Latency Effects:

    • Look for delays in liquidity replenishment (e.g., bid orders disappear after a large trade).
    • Sudden spread widening suggests market makers are slow to adjust.

2) Bayesian Model for Liquidity & Spread¶

A Bayesian state-space model can estimate the true latent liquidity profile and spread dynamics.

Hidden States:¶

  • $ L_t $: True market liquidity at time $ t $.
  • $ S_t $: True bid-ask spread at time $ t $.

Observations:¶

  • $ O_t $: Observed spread and order book depth.
  • $ V_t $: Volume at each price level.

State Transition Model:¶

  • $$ L_t \sim \mathcal{N}(L_{t-1}, \sigma_L^2) $$
    (Liquidity follows a stochastic process)
  • $$ S_t \sim \mathcal{N}(S_{t-1} + \alpha L_t, \sigma_S^2) $$
    (Spread depends on liquidity)

Observation Model:¶

  • $$ O_t \sim \mathcal{N}(S_t, \sigma_O^2) $$
    (Observed spread is a noisy version of the true spread)
  • $$ V_t \sim \mathcal{N}(L_t, \sigma_V^2) $$
    (Volume is a noisy proxy for liquidity)

Inference:¶

Use Markov Chain Monte Carlo (MCMC) or Variational Inference to estimate the posterior distribution of $L_t$ and $S_t$.

In [51]:
# Modified Simulation: Liquidity Shocks and Nonlinear Spread Response

# Parameters
T = 100  # Time steps
sigma_L = 1.5  # Volatility of liquidity
sigma_S = 0.9  # Volatility of spread
sigma_O = 2.0  # Increased observation noise for spread
sigma_V = 2.0  # Increased observation noise for liquidity
alpha = 2.5    # Stronger influence of liquidity on spread

# Initialize arrays
L_true = np.zeros(T)  # True liquidity
S_true = np.zeros(T)  # True spread
O_obs = np.zeros(T)   # Observed spread
V_obs = np.zeros(T)   # Observed volume

# Initial values
L_true[0] = np.random.normal(5, sigma_L)  # Starting liquidity
S_true[0] = np.random.normal(2, sigma_S)  # Starting spread

# Simulate state-space dynamics with liquidity crashes
for t in range(1, T):
    # Introduce liquidity shocks randomly
    if np.random.rand() < 0.1:  # 10% probability of a liquidity crash
        L_true[t] = L_true[t-1] * 0.2  # Liquidity drops sharply
    else:
        L_true[t] = L_true[t-1] + np.random.normal(0, sigma_L)

    # Spread reacts nonlinearly to liquidity
    S_true[t] = S_true[t-1] + alpha / (L_true[t] + 0.1) + np.random.normal(0, sigma_S)

    # Observations with added noise
    O_obs[t] = S_true[t] + np.random.normal(0, sigma_O)
    V_obs[t] = L_true[t] + np.random.normal(0, sigma_V)

# Plot results
fig, ax = plt.subplots(2, 1, figsize=(15, 7), sharex=True)

ax[0].plot(L_true, label=r'True Liquidity $(L_t)$', linestyle="--", color="blue", lw= 4)
ax[0].plot(V_obs, label=r'Observed Volume $(V_t)$', linestyle="dotted", color="red", lw = 4)
ax[0].set_ylabel("Liquidity / Volume", fontsize = 15)
ax[0].legend(fontsize = 15)
ax[0].set_title("Liquidity and Observed Volume", fontsize = 25)

ax[1].plot(S_true, label="True Spread (S_t)", linestyle="--", color="green", lw = 3)
ax[1].plot(O_obs, label="Observed Spread (O_t)", linestyle="dotted", color="orange", lw = 3)
ax[1].set_ylabel("Bid-Ask Spread", fontsize = 15)
ax[1].set_xlabel("Time Step", fontsize = 15)
ax[1].legend(fontsize = 15)
ax[1].set_title("Bid-Ask Spread and Observed Spread", fontsize = 25)

plt.tight_layout()
plt.show()
  • First Plot (Liquidity & Observed Volume): This shows the true liquidity $ L_t $ (blue dashed) and the observed volume $ V_t $ (red dotted). The key takeaway is that liquidity can crash suddenly, and observed volume is a noisy proxy that does not perfectly track the true liquidity state. When liquidity drops, market conditions worsen, leading to higher spreads.

  • Second Plot (Bid-Ask Spread & Observed Spread): This illustrates the true spread $ S_t $ (green dashed) and the observed spread $ O_t $ (orange dotted). The key insight is that spreads widen dramatically when liquidity crashes, showing a nonlinear relationship. The observed spread contains noise, making direct market signals unreliable without proper filtering.

  • Liquidity Drives Spread Formation: The model shows that when market liquidity $ L_t $ drops, the bid-ask spread $ S_t $ widens nonlinearly. This captures how thin order books lead to higher trading costs, making it critical for traders to monitor liquidity depth.

  • Predicting Liquidity Crashes: By modeling hidden liquidity as a state-space variable, we can estimate when market makers might pull liquidity. This helps traders anticipate spread widening events and adjust their strategies accordingly.

  • Noise Filtering for Execution: The observed volume $ V_t $ and spread $ O_t $ contain noise, making raw order book data misleading. Using a Bayesian approach, we can infer true liquidity conditions, which improves execution timing for large trades.

  • Exploiting Market Inefficiencies: Traders can profit from transient liquidity shocks by identifying temporary spread dislocations. When the model detects a liquidity-induced spread spike, market makers can provide liquidity at wider spreads for higher returns.


4) How a Quant Profits¶

A trader can exploit this using:

  • Spread Mean Reversion:

    • If the bid-ask spread widens temporarily due to low top-level liquidity, it may revert.
    • A market-making strategy can profit by providing liquidity at wider spreads.
  • Liquidity Detection & Execution:

    • If liquidity is deeper in the book, a trader can use hidden iceberg orders or slice orders to execute without revealing intent.
  • Sniping & Latency Arbitrage:

    • If spreads widen due to slow liquidity updates, fast traders can hit stale quotes before the spread normalizes.
  • Identifying Stop Hunts & Fake Liquidity:

    • If large liquidity appears and disappears quickly, this could be spoofing or stop-hunting behavior.
    • Traders can use order book imbalance signals to predict fake liquidity movements.